import matplotlib.pylab as plt
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np

if __name__ == '__main__':
    N = 500
    df = pd.read_csv('UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None)
    y = pd.read_csv('UCI HAR Dataset/train/y_train.txt', delim_whitespace=True, header=None, squeeze=True)[:N]
    
    # plt.title('accelerometer reading')
    # plt.plot(df.ix[0, :])
    # plt.show()

    # 
    extraction_settings = ComprehensiveFCParameters()
    master_df = pd.DataFrame({'feature': df[:N].values.flatten(),
                              'id': np.arange(N).repeat(df.shape[1])})

    # 时间序列特征工程
    X = extract_features(timeseries_container=master_df, n_jobs=0, column_id='id', impute_function=impute,
                         default_fc_parameters=extraction_settings)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    cl = DecisionTreeClassifier()
    cl.fit(X_train, y_train)
    print(classification_report(y_test, cl.predict(X_test)))

    # 未进行时间序列特征工程
    X_1 = df.ix[:N - 1, :]
    X_train, X_test, y_train, y_test = train_test_split(X_1, y, test_size=.2)
    cl = DecisionTreeClassifier()
    cl.fit(X_train, y_train)
    print(classification_report(y_test, cl.predict(X_test)))